package com.company.teddy;

import java.io.File;
import java.io.IOException;
import java.util.*;

/**
 * Created by v on 17-4-23.
 */
public class Read {
    private static final String pathURL="/home/v/IdeaProjects/Teddy/set/url";

    private static final String pathSign="";

    private static final String pathHTML="";

    //从文件中读取网站地址
    public static List<String> ReadURL(){
        ArrayList<String> list=new ArrayList<>();

        File file=new File(pathURL);
        try {
            Scanner input=new Scanner(file);

            while (input.hasNext()){
                list.add(input.next());
            }
        }catch (IOException ex){
            ex.printStackTrace();
        }

        return TextProcessing.processList(list);
    }

    //从文件中读取标识
    public static HashMap<String,List<String>> ReadSign(){
        File file=new File(pathSign);
        HashMap<String,List<String>> map=new HashMap<>();

        try {
            Scanner input=new Scanner(file);
            while (input.hasNext()){
                String line=input.nextLine();
                line(line,map);
            }
        }catch (IOException ex){
            ex.printStackTrace();
        }

        return map;
    }

    //还原空格
    private static String reduction(String str){
        return str.replace('%',' ');
    }

    //把一行拆分为一个list
    private static void line(String line, Map<String,List<String>> map){
        List<String> list=new ArrayList<>();

        Scanner input=new Scanner(line);

        String web=input.next();

        while (input.hasNext()){
            list.add(reduction(input.next()));
        }

        map.put(web,list);
    }

    //读取目录下的所有爬取的html文件
    public static File[] ReadHTML(){
        File file=new File(pathHTML);

        File[] files=file.listFiles();
        return files;
    }
}
